'\" te
.\" Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
.\" Copyright 2022 Joyent, Inc.
.\" Copyright (c) 2002, Sun Microsystems, Inc. All Rights Reserved.
.\" The contents of this file are subject to the terms of the Common Development and Distribution License (the "License").  You may not use this file except in compliance with the License.
.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE or http://www.opensolaris.org/os/licensing.  See the License for the specific language governing permissions and limitations under the License.
.\" When distributing Covered Code, include this CDDL HEADER in each file and include the License file at usr/src/OPENSOLARIS.LICENSE.  If applicable, add the following below this CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your own identifying information: Portions Copyright [yyyy] [name of copyright owner]
.TH KMEM_CACHE_CREATE 9F "Feb 18, 2015"
.SH NAME
kmem_cache_create, kmem_cache_alloc, kmem_cache_free, kmem_cache_destroy,
kmem_cache_set_move \- kernel memory cache allocator operations
.SH SYNOPSIS
.nf
#include <sys/types.h>
#include <sys/kmem.h>

\fBkmem_cache_t *\fR\fBkmem_cache_create\fR(\fBchar *\fR\fIname\fR, \fBsize_t\fR \fIbufsize\fR,
     \fBsize_t\fR \fIalign\fR, \fBint\fR (*\fIconstructor\fR)(void *, void *, int),
     \fBvoid\fR (*\fIdestructor\fR)(void *, void *), \fBvoid\fR (*\fIreclaim\fR)(void *),
     \fBvoid\fR *\fIprivate\fR, \fBvoid\fR *\fIvmp\fR, \fBint\fR \fIcflags\fR);
.fi

.LP
.nf
\fBvoid\fR \fBkmem_cache_destroy\fR(\fBkmem_cache_t\fR *\fIcp\fR);
.fi

.LP
.nf
\fBvoid *\fR\fBkmem_cache_alloc\fR(\fBkmem_cache_t\fR *\fIcp\fR, \fBint\fR \fIkmflag\fR);
.fi

.LP
.nf
\fBvoid\fR \fBkmem_cache_free\fR(\fBkmem_cache_t\fR *\fIcp\fR, \fBvoid\fR *\fIobj\fR);
.fi

.LP
.nf
\fBvoid\fR \fBkmem_cache_set_move\fR(\fBkmem_cache_t\fR *\fIcp\fR, \fBkmem_cbrc_t\fR (*\fImove\fR)(\fBvoid\fR *,
     \fBvoid\fR *, \fBsize_t\fR *, \fBvoid\fR *));
.fi

.LP
.nf
 [Synopsis for callback functions:]
.fi

.LP
.nf
\fBint\fR (*\fIconstructor\fR)(\fBvoid\fR *\fIbuf\fR, \fBvoid\fR *\fIuser_arg\fR, \fBint\fR \fIkmflags\fR);
.fi

.LP
.nf
\fBvoid\fR (*\fIdestructor\fR)(\fBvoid\fR *\fIbuf\fR, \fBvoid\fR *\fIuser_arg\fR);
.fi

.LP
.nf
\fBkmem_cbrc_t\fR (*\fImove\fR)(\fBvoid\fR *\fIold\fR, \fBvoid\fR *\fInew\fR, \fBsize_t\fR \fIbufsize\fR,
     \fBvoid\fR *\fIuser_arg\fR);
.fi

.SH INTERFACE LEVEL
illumos DDI specific (illumos DDI)
.SH PARAMETERS
The parameters for the \fBkmem_cache_*\fR functions are as follows:
.sp
.ne 2
.na
\fB\fIname\fR\fR
.ad
.RS 15n
Descriptive name of a \fBkstat\fR(9S) structure of class \fBkmem_cache\fR.
Names longer than 31 characters are truncated.
.RE

.sp
.ne 2
.na
\fB\fIbufsize\fR\fR
.ad
.RS 15n
Size of the objects it manages.
.RE

.sp
.ne 2
.na
\fB\fIalign\fR\fR
.ad
.RS 15n
Required object alignment.
.RE

.sp
.ne 2
.na
\fB\fIconstructor\fR\fR
.ad
.RS 15n
Pointer to an object constructor function. Parameters are defined below.
.RE

.sp
.ne 2
.na
\fB\fIdestructor\fR\fR
.ad
.RS 15n
Pointer to an object destructor function. Parameters are defined below.
.RE

.sp
.ne 2
.na
\fB\fIreclaim\fR\fR
.ad
.RS 15n
Drivers should pass \fBNULL\fR.
.RE

.sp
.ne 2
.na
\fB\fIprivate\fR\fR
.ad
.RS 15n
Pass-through argument for constructor/destructor.
.RE

.sp
.ne 2
.na
\fB\fIvmp\fR\fR
.ad
.RS 15n
Drivers should pass \fBNULL\fR.
.RE

.sp
.ne 2
.na
\fB\fIcflags\fR\fR
.ad
.RS 15n
Drivers must pass 0.
.RE

.sp
.ne 2
.na
\fB\fIkmflag\fR\fR
.ad
.RS 15n
Possible flags are:
.sp
.ne 2
.na
\fB\fBKM_SLEEP\fR\fR
.ad
.RS 15n
Allow sleeping (blocking) until memory is available.
.RE

.sp
.ne 2
.na
\fB\fBKM_NOSLEEP\fR\fR
.ad
.RS 15n
Return NULL immediately if memory is not available, but after an aggressive
reclaiming attempt.  Any mention of \fBKM_NOSLEEP\fR without mentioning
\fBKM_NOSLEEP_LAZY\fR (see below) applies to both values.
.RE

.sp
.ne 2
.na
\fB\fBKM_NOSLEEP_LAZY\fR\fR
.ad
.RS 15n
Return NULL immediately if memory is not available, without the aggressive
reclaiming attempt.  This is actually two flags combined:
(\fBKM_NOSLEEP\fR | \fBKM_NORMALPRI\fR), the latter flag indicating not to
attempt reclamation before giving up and returning NULL.
.RE

.sp
.ne 2
.na
\fB\fBKM_PUSHPAGE\fR\fR
.ad
.RS 15n
Allow the allocation to use reserved memory.
.RE

.RE

.sp
.ne 2
.na
\fB\fIobj\fR\fR
.ad
.RS 15n
Pointer to the object allocated by \fBkmem_cache_alloc()\fR.
.RE

.sp
.ne 2
.na
\fB\fImove\fR\fR
.ad
.RS 15n
Pointer to an object relocation function. Parameters are defined below.
.RE

.sp
.LP
The parameters for the callback constructor function are as follows:
.sp
.ne 2
.na
\fB\fBvoid *\fIbuf\fR\fR\fR
.ad
.RS 18n
Pointer to the object to be constructed.
.RE

.sp
.ne 2
.na
\fB\fBvoid *\fIuser_arg\fR\fR\fR
.ad
.RS 18n
The \fIprivate\fR parameter from the call to \fBkmem_cache_create()\fR; it is
typically a pointer to the soft-state structure.
.RE

.sp
.ne 2
.na
\fB\fBint \fIkmflags\fR\fR\fR
.ad
.RS 18n
Propagated \fIkmflag\fR values.
.RE

.sp
.LP
The parameters for the callback destructor function are as follows:
.sp
.ne 2
.na
\fB\fBvoid *\fIbuf\fR\fR\fR
.ad
.RS 18n
Pointer to the object to be deconstructed.
.RE

.sp
.ne 2
.na
\fB\fBvoid *\fIuser_arg\fR\fR\fR
.ad
.RS 18n
The \fIprivate\fR parameter from the call to \fBkmem_cache_create()\fR; it is
typically a pointer to the soft-state structure.
.RE

.sp
.LP
The parameters for the callback \fBmove()\fR function are as follows:
.sp
.ne 2
.na
\fB\fBvoid *\fIold\fR\fR\fR
.ad
.RS 18n
Pointer to the object to be moved.
.RE

.sp
.ne 2
.na
\fB\fBvoid *\fInew\fR\fR\fR
.ad
.RS 18n
Pointer to the object that serves as the copy destination for the contents of
the old parameter.
.RE

.sp
.ne 2
.na
\fB\fBsize_t \fIbufsize\fR\fR\fR
.ad
.RS 18n
Size of the object to be moved.
.RE

.sp
.ne 2
.na
\fB\fBvoid *\fIuser_arg\fR\fR\fR
.ad
.RS 18n
The private parameter from the call to \fBkmem_cache_create()\fR; it is
typically a pointer to the \fBsoft-state\fR structure.
.RE

.SH DESCRIPTION
In many cases, the cost of initializing and destroying an object exceeds the
cost of allocating and freeing memory for it. The functions described here
address this condition.
.sp
.LP
Object caching is a technique for dealing with objects that are:
.RS +4
.TP
.ie t \(bu
.el o
frequently allocated and freed, and
.RE
.RS +4
.TP
.ie t \(bu
.el o
have setup and initialization costs.
.RE
.sp
.LP
The idea is to allow the allocator and its clients to cooperate to preserve the
invariant portion of an object's initial state, or constructed state, between
uses, so it does not have to be destroyed and re-created every time the object
is used. For example, an object containing a mutex only needs to have
\fBmutex_init()\fR applied once, the first time the object is allocated. The
object can then be freed and reallocated many times without incurring the
expense of \fBmutex_destroy()\fR and \fBmutex_init()\fR each time. An object's
embedded locks, condition variables, reference counts, lists of other objects,
and read-only data all generally qualify as constructed state. The essential
requirement is that the client must free the object (using
\fBkmem_cache_free()\fR) in its constructed state. The allocator cannot enforce
this, so programming errors will lead to hard-to-find bugs.
.sp
.LP
A driver should call \fBkmem_cache_create()\fR at the time of \fB_init\fR(9E)
or \fBattach\fR(9E), and call the corresponding \fBkmem_cache_destroy()\fR at
the time of \fB_fini\fR(9E) or \fBdetach\fR(9E).
.sp
.LP
\fBkmem_cache_create()\fR creates a cache of objects, each of size
\fIbufsize\fR bytes, aligned on an \fIalign\fR boundary. Drivers not requiring
a specific alignment can pass 0. \fIname\fR identifies the cache for statistics
and debugging. \fIconstructor\fR and \fIdestructor\fR convert plain memory into
objects and back again; \fIconstructor\fR can fail if it needs to allocate
memory but cannot. \fIprivate\fR is a parameter passed to the constructor and
destructor callbacks to support parameterized caches (for example, a pointer to
an instance of the driver's soft-state structure). To facilitate debugging,
\fBkmem_cache_create()\fR creates a \fBkstat\fR(9S) structure of class
\fBkmem_cache\fR and name \fIname\fR. It returns an opaque pointer to the
object cache.
.sp
.LP
\fBkmem_cache_alloc()\fR gets an object from the cache. The object will be in
its constructed state. \fIkmflag\fR has either \fBKM_SLEEP\fR or
\fBKM_NOSLEEP\fR set, indicating whether it is acceptable to wait for memory
if none is currently available.
.sp
.LP
A small pool of reserved memory is available to allow the system to progress
toward the goal of freeing additional memory while in a low memory situation.
The \fBKM_PUSHPAGE\fR flag enables use of this reserved memory pool on an
allocation. This flag can be used by drivers that implement \fBstrategy\fR(9E)
on memory allocations associated with a single I/O operation. The driver
guarantees that the I/O operation will complete (or timeout) and, on
completion, that the memory will be returned. The \fBKM_PUSHPAGE\fR flag should
be used only in \fBkmem_cache_alloc()\fR calls. All allocations from a given
cache should be consistent in their use of the flag. A driver that adheres to
these restrictions can guarantee progress in a low memory situation without
resorting to complex private allocation and queuing schemes. If
\fBKM_PUSHPAGE\fR is specified, \fBKM_SLEEP\fR can also be used without causing
deadlock.
.sp
.LP
\fBkmem_cache_free()\fR returns an object to the cache. The object must be in
its constructed state.
.sp
.LP
\fBkmem_cache_destroy()\fR destroys the cache and releases all associated
resources. All allocated objects must have been previously freed.
.sp
.LP
\fBkmem_cache_set_move()\fR registers a function that the allocator may call to
move objects from sparsely allocated pages of memory so that the system can
reclaim pages that are tied up by the client. Since caching objects of the same
size and type already makes severe memory fragmentation unlikely, there is
generally no need to register such a function. The idea is to make it possible
to limit worst-case fragmentation in caches that exhibit a tendency to become
highly fragmented. Only clients that allocate a mix of long- and short-lived
objects from the same cache are prone to exhibit this tendency, making them
candidates for a \fBmove()\fR callback.
.sp
.LP
The \fBmove()\fR callback supplies the client with two addresses: the allocated
object that the allocator wants to move and a buffer selected by the allocator
for the client to use as the copy destination. The new parameter is an
allocated, constructed object ready to receive the contents of the old
parameter. The \fIbufsize\fR parameter supplies the size of the object, in case
a single move function handles multiple caches whose objects differ only in
size. Finally, the private parameter passed to the constructor and destructor
is also passed to the \fBmove()\fR callback.
.sp
.LP
Only the client knows about its own data and when it is a good time to move it.
The client cooperates with the allocator to return unused memory to the system,
and the allocator accepts this help at the client's convenience. When asked to
move an object, the client can respond with any of the following:
.sp
.in +2
.nf
typedef enum kmem_cbrc {
             KMEM_CBRC_YES,
             KMEM_CBRC_NO,
             KMEM_CBRC_LATER,
             KMEM_CBRC_DONT_NEED,
             KMEM_CBRC_DONT_KNOW
} kmem_cbrc_t;
.fi
.in -2
.sp

.sp
.LP
The client must not explicitly free either of the objects passed to the
\fBmove()\fR callback, since the allocator wants to free them directly to the
slab layer (bypassing the per-CPU magazine layer). The response tells the
allocator which of the two object parameters to free:
.sp
.ne 2
.na
\fB\fBKMEM_CBRC_YES\fR\fR
.ad
.RS 23n
The client moved the object; the allocator frees the old parameter.
.RE

.sp
.ne 2
.na
\fB\fBKMEM_CBRC_NO\fR\fR
.ad
.RS 23n
The client refused to move the object; the allocator frees the new parameter
(the unused copy destination).
.RE

.sp
.ne 2
.na
\fB\fBKMEM_CBRC_LATER\fR\fR
.ad
.RS 23n
The client is using the object and cannot move it now; the allocator frees the
new parameter (the unused copy destination). The client should use
\fBKMEM_CBRC_LATER\fR instead of \fBKMEM_CBRC_NO\fR if the object is likely to
become movable soon.
.RE

.sp
.ne 2
.na
\fB\fBKMEM_CBRC_DONT_NEED\fR\fR
.ad
.RS 23n
The client no longer needs the object; the allocator frees both the old and new
parameters. This response is the client's opportunity to be a model citizen and
give back as much as it can.
.RE

.sp
.ne 2
.na
\fB\fBKMEM_CBRC_DONT_KNOW\fR\fR
.ad
.RS 23n
The client does not know about the object because:
.sp
.ne 2
.na
\fBa)\fR
.ad
.RS 6n
the client has just allocated the object and has not yet put it wherever it
expects to find known objects
.RE

.sp
.ne 2
.na
\fBb)\fR
.ad
.RS 6n
the client has removed the object from wherever it expects to find known
objects and is about to free the object
.RE

.sp
.ne 2
.na
\fBc)\fR
.ad
.RS 6n
the client has freed the object
.RE

In all of these cases above, the allocator frees the new parameter (the unused
copy destination) and searches for the old parameter in the magazine layer. If
the object is found, it is removed from the magazine layer and freed to the
slab layer so that it will no longer tie up an entire page of memory.
.RE

.sp
.LP
Any object passed to the \fBmove()\fR callback is guaranteed to have been
touched only by the allocator or by the client. Because memory patterns applied
by the allocator always set at least one of the two lowest order bits, the
bottom two bits of any pointer member (other than \fBchar *\fR or \fBshort
*\fR, which may not be 8-byte aligned on all platforms) are available to the
client for marking cached objects that the client is about to free. This way,
the client can recognize known objects in the \fBmove()\fR callback by the
unmarked (valid) pointer value.
.sp
.LP
If the client refuses to move an object with either \fBKMEM_CBRC_NO\fR or
\fBKMEM_CBRC_LATER\fR, and that object later becomes movable, the client can
notify the allocator by calling \fBkmem_cache_move_notify()\fR. Alternatively,
the client can simply wait for the allocator to call back again with the same
object address. Responding \fBKMEM_CRBC_NO\fR even once or responding
\fBKMEM_CRBC_LATER\fR too many times for the same object makes the allocator
less likely to call back again for that object.
.LP
.nf
[Synopsis for notification function:]
.fi

.LP
.nf
\fBvoid\fR \fBkmem_cache_move_notify\fR(\fBkmem_cache_t\fR *\fIcp\fR, \fBvoid\fR *\fIobj\fR);
.fi

.sp
.LP
The parameters for the \fBnotification\fR function are as follows:
.sp
.ne 2
.na
\fB\fIcp\fR\fR
.ad
.RS 7n
Pointer to the object cache.
.RE

.sp
.ne 2
.na
\fB\fIobj\fR\fR
.ad
.RS 7n
Pointer to the object that has become movable since an earlier refusal to move
it.
.RE

.SH CONTEXT
Constructors can be invoked during any call to \fBkmem_cache_alloc()\fR, and
will run in that context. Similarly, destructors can be invoked during any call
to \fBkmem_cache_free()\fR, and can also be invoked during
\fBkmem_cache_destroy()\fR. Therefore, the functions that a constructor or
destructor invokes must be appropriate in that context. Furthermore, the
allocator may also call the constructor and destructor on objects still under
its control without client involvement.
.sp
.LP
\fBkmem_cache_create()\fR and \fBkmem_cache_destroy()\fR must not be called
from interrupt context. \fBkmem_cache_create()\fR can also block for available
memory.
.sp
.LP
\fBkmem_cache_alloc()\fR can be called from interrupt context only if the
\fBKM_NOSLEEP\fR flag is set. It can be called from user or kernel context
with any valid flag.
.sp
.LP
\fBkmem_cache_free()\fR can be called from user, kernel, or interrupt context.
.sp
.LP
\fBkmem_cache_set_move()\fR is called from the same context as
\fBkmem_cache_create()\fR, immediately after \fBkmem_cache_create()\fR and
before allocating any objects from the cache.
.sp
.LP
The registered \fBmove()\fR callback is always invoked in the same global
callback thread dedicated for move requests, guaranteeing that no matter how
many clients register a \fBmove()\fR function, the allocator never tries to
move more than one object at a time. Neither the allocator nor the client can
be assumed to know the object's whereabouts at the time of the callback.
.SH EXAMPLES
\fBExample 1 \fRObject Caching
.sp
.LP
Consider the following data structure:

.sp
.in +2
.nf
struct foo {
    kmutex_t foo_lock;
    kcondvar_t foo_cv;
    struct bar *foo_barlist;
    int foo_refcnt;
    };
.fi
.in -2

.sp
.LP
Assume that a \fBfoo\fR structure cannot be freed until there are no
outstanding references to it (\fBfoo_refcnt == 0\fR) and all of its pending
\fBbar\fR events (whatever they are) have completed (\fBfoo_barlist ==
NULL\fR). The life cycle of a dynamically allocated \fBfoo\fR would be
something like this:

.sp
.in +2
.nf
foo = kmem_alloc(sizeof (struct foo), KM_SLEEP);
mutex_init(&foo->foo_lock, ...);
cv_init(&foo->foo_cv, ...);
foo->foo_refcnt = 0;
foo->foo_barlist = NULL;
    use foo;
ASSERT(foo->foo_barlist == NULL);
ASSERT(foo->foo_refcnt == 0);
cv_destroy(&foo->foo_cv);
mutex_destroy(&foo->foo_lock);
kmem_free(foo);
.fi
.in -2

.sp
.LP
Notice that between each use of a \fBfoo\fR object we perform a sequence of
operations that constitutes nothing but expensive overhead. All of this
overhead (that is, everything other than \fBuse foo\fR above) can be eliminated
by object caching.

.sp
.in +2
.nf
int
foo_constructor(void *buf, void *arg, int tags)
{
    struct foo *foo = buf;
    mutex_init(&foo->foo_lock, ...);
    cv_init(&foo->foo_cv, ...);
    foo->foo_refcnt = 0;
    foo->foo_barlist = NULL;
    return (0);
}

void
foo_destructor(void *buf, void *arg)
{
    struct foo *foo = buf;
    ASSERT(foo->foo_barlist == NULL);
    ASSERT(foo->foo_refcnt == 0);
    cv_destroy(&foo->foo_cv);
    mutex_destroy(&foo->foo_lock);
}

user_arg = ddi_get_soft_state(foo_softc, instance);
(void) snprintf(buf, KSTAT_STRLEN, "foo%d_cache",
        ddi_get_instance(dip));
foo_cache = kmem_cache_create(buf,
        sizeof (struct foo), 0,
        foo_constructor, foo_destructor,
        NULL, user_arg, 0);
.fi
.in -2

.sp
.LP
To allocate, use, and free a \fBfoo\fR object:

.sp
.in +2
.nf
foo = kmem_cache_alloc(foo_cache, KM_SLEEP);
    use foo;
kmem_cache_free(foo_cache, foo);
.fi
.in -2

.sp
.LP
This makes \fBfoo\fR allocation fast, because the allocator will usually do
nothing more than fetch an already-constructed \fBfoo\fR from the cache.
\fBfoo_constructor\fR and \fBfoo_destructor\fR will be invoked only to populate
and drain the cache, respectively.

.LP
\fBExample 2 \fRRegistering a Move Callback
.sp
.LP
To register a \fBmove()\fR callback:

.sp
.in +2
.nf
object_cache = kmem_cache_create(...);
kmem_cache_set_move(object_cache, object_move);
.fi
.in -2

.SH RETURN VALUES
If successful, the constructor function must return \fB0\fR. If
\fBKM_NOSLEEP\fR or \fBKM_NOSLEEP_LAZY\fR is set and memory cannot be
allocated without sleeping, the constructor must return -\fB1\fR.  If the
constructor takes extraordinary steps during a \fBKM_NOSLEEP\fR construction,
it may not take those for a \fBKM_NOSLEEP_LAZY\fR construction.
.sp
.LP
\fBkmem_cache_create()\fR returns a pointer to the allocated cache.
.sp
.LP
If successful, \fBkmem_cache_alloc()\fR returns a pointer to the allocated
object. If \fBKM_NOSLEEP\fR is set and memory cannot be allocated without
sleeping, \fBkmem_cache_alloc()\fR returns \fBNULL\fR.
.SH ATTRIBUTES
See \fBattributes\fR(7) for descriptions of the following attributes:
.sp

.sp
.TS
box;
c | c
l | l .
ATTRIBUTE TYPE	ATTRIBUTE VALUE
_
Interface Stability	Committed
.TE

.SH SEE ALSO
.BR condvar (9F),
.BR kmem_alloc (9F),
.BR mutex (9F),
.BR kstat (9S)
.sp
.LP
\fIWriting Device Drivers\fR
.sp
.LP
\fIThe Slab Allocator: An Object-Caching Kernel Memory Allocator\fR, Bonwick,
J.; USENIX Summer 1994 Technical Conference (1994).
.sp
.LP
\fIMagazines and vmem: Extending the Slab Allocator to Many CPUs and Arbitrary
Resources\fR, Bonwick, J. and Adams, J.; USENIX 2001 Technical Conference
(2001).
.SH NOTES
The constructor must be immediately reversible by the destructor, since the
allocator may call the constructor and destructor on objects still under its
control at any time without client involvement.
.sp
.LP
The constructor must respect the \fIkmflags\fR argument by forwarding it to
allocations made inside the \fIconstructor\fR, and must not ASSERT anything
about the given flags.
.sp
.LP
The user argument forwarded to the constructor must be fully operational before
it is passed to \fBkmem_cache_create()\fR.
